# ###############################
#GENDER AND COLLABORATION, Lorenzo Ductor, Sanjeev Goyal and Anja Prummer
# Created by: Gustavo Nicolas Paez
# Code to compute the eigenfactor and article influence score for different values of the parameter alpha
# Imput: cross-citation matrices obtained in Stata running the code eigenfactor builder.do
# Output: the eigenfactor and article influence score of 100 journals per year
##########################


#Pair Citation Network
library(igraph)
library(readr)
library(readxl)

#Step 1: Define the lag
lag=5
result=matrix(0,0,14)
for(year in (1969+lag):2017){
  #Step 2: Import the citation matrix and the articles per journal from the Stata Output (eigenfactor builder.do)
  articlesperJournal <- as.matrix(read_excel(paste("articlesperJournal.xls",sep="")))
  articles=articlesperJournal[articlesperJournal[,2]==year,]
  citationMatrix <- as.matrix(read_excel(paste("citationMatrix",year,".xls",sep="")))
  position=1:length(articles[,1])
  transMatrix=matrix(0,length(articles[,1]),length(articles[,1]))
  for(i in 1:length(articles[,1])){
    for(j in 1:length(articles[,1])){
      temp=as.numeric(citationMatrix[citationMatrix[,3]%in%articles[i,1]&citationMatrix[,1]%in%articles[j,1],4])
      if(length(temp)!=0&i!=j){
        transMatrix[i,j]=temp[1]
      }
    } 
  }
  #Calculate the share of articles
  a=as.matrix(as.numeric(articles[,3])/sum(as.numeric(articles[,3])))
  #Calculate H adj
  sumZ=as.matrix(colSums(transMatrix))
  ones=matrix(1,1,length(articles[,1]))
  invSumZ=t((sumZ^-1)%*%ones)
  transMatrixAdj=transMatrix*invSumZ
  H=transMatrixAdj
  H[is.na(H)]=0
  cal=as.matrix(colSums(transMatrixAdj))
  newa=a%*%matrix(1,1,sum(is.na(cal)))
  transMatrixAdj[,is.na(cal)]=newa
  #Calculate P
  res=articles[,1:2]
  for (alpha in c(0.5,0.6,0.7,0.8,0.9,1)){
    B=a%*%matrix(1,1,length(a))
    P=alpha*transMatrixAdj+(1-alpha)*B
    ei=-Re(eigen(P)$vectors[,1])
#Calculate article influence and eigenfactor
    EF=100*(H%*%ei)/sum(H%*%ei)
    AI=EF*0.01/a
    res=cbind(res,EF,AI)
  }
  result=rbind(result,res)
}
colnames(result)=c("v5","v3","EF05","AI05","EF06","AI06","EF07","AI07","EF08","AI08","EF09","AI09","EF10","AI10")
write.csv(result, paste("eigenFactorDataLag",lag,".csv",sep=""))
